Inspect Data

suppressPackageStartupMessages(library(tidyverse))
library(targets)
library(tarchetypes)
library(DT)
knitr::opts_knit$set(root.dir = "../../")

Modelling Matrix

Original

df_mm <- tar_read(df_mm)

df_mm %>%
  select(category_id, activity_id_new, has_finding, everything())
## # A tibble: 4,055 × 185
##    category_id activity_id_new has_finding start_date n_visit n_unsch_visit n_sched_visit ratio_unsch_visit ratio_unsch_visit_rnk  n_ae n_sae ae_per_visit sae_per_visit ae_per_visit_rnk
##    <chr>       <chr>           <chr>       <date>       <dbl>         <dbl>         <dbl>             <dbl>                 <dbl> <dbl> <dbl>        <dbl>         <dbl>            <dbl>
##  1 cnsn        00001           yes         2015-01-01     732            26           706            0.0355                 0.289   178     4       0.243        0.00546            0.206
##  2 cnsn        00003           yes         2014-01-01      NA            NA            NA           NA                     NA        NA    NA      NA           NA                 NA    
##  3 cnsn        00004           yes         2015-01-01     181            33           148            0.182                  0.761   108     8       0.597        0.0442             0.701
##  4 cnsn        00006           yes         2014-01-01       0             0             0          NaN                      0.990     6     0      NA           NA                  0    
##  5 cnsn        00008           yes         2015-01-01     114             0           114            0                      0.273     3     0       0.0263       0                  0.481
##  6 cnsn        00009           yes         2015-01-01     297             0           297            0                      0        23     5       0.0774       0.0168             0.243
##  7 cnsn        00012           yes         2015-01-01     446             7           439            0.0157                 0.333   147    17       0.330        0.0381             0.410
##  8 cnsn        00013           yes         2015-01-01     688            65           623            0.0945                 0.594   172    27       0.25         0.0392             0.203
##  9 cnsn        00015           yes         2015-01-01     141            17           124            0.121                  0.717    69     3       0.489        0.0213             0.767
## 10 cnsn        00016           yes         2015-01-01       0             0             0          NaN                      0.433    32     5      NA           NA                  0.433
## # … with 4,045 more rows, and 171 more variables: sae_per_visit_rnk <dbl>, median_ae_reporting_delay <dbl>, mean_ae_reporting_delay <dbl>, max_ae_reporting_delay <dbl>,
## #   median_sae_reporting_delay <dbl>, mean_sae_reporting_delay <dbl>, max_sae_reporting_delay <dbl>, n_patients <dbl>, therapeutic_area <chr>, n_active_sites_pi_yy <dbl>,
## #   n_active_sites_pi_yy_rnk <dbl>, n_active_trials_at_site_in_ta_yy <dbl>, n_active_trials_at_site_in_ta_yy_rnk <dbl>, time_on_study_dd <dbl>, dev_data_available <chr>,
## #   n_maj_dev <dbl>, n_min_dev <dbl>, n_maj_dev_per_daysonstudy <dbl>, n_min_dev_per_daysonstudy <dbl>, n_maj_dev_per_daysonstudy_rnk <dbl>, n_min_dev_per_daysonstudy_rnk <dbl>,
## #   issue_data_available <chr>, mean_iss_completion_time <dbl>, median_iss_completion_time <dbl>, max_iss_completion_time <dbl>, n_iss_open <dbl>, n_iss_open_per_pat <dbl>,
## #   n_iss_due <dbl>, n_iss_compl <dbl>, n_iss_compl_per_daysonstudy <dbl>, n_iss_late <dbl>, n_iss_cnsn_open <dbl>, n_iss_cnsn_due <dbl>, n_iss_cnsn_compl <dbl>,
## #   n_iss_cnsn_late <dbl>, n_iss_dtin_open <dbl>, n_iss_dtin_due <dbl>, n_iss_dtin_compl <dbl>, n_iss_dtin_late <dbl>, n_iss_ptpe_open <dbl>, n_iss_ptpe_due <dbl>, …
tibble(columns = colnames(df_mm)) %>%
  DT::datatable()

Binned

df_mm_bin <- tar_read(df_mm_bin)

df_mm_bin %>%
  select(category_id, activity_id_new, has_finding, everything())
## # A tibble: 4,055 × 861
##    category_id activity_id_new has_finding nvisitLL nvisitML nvisitM nvisitMH nvisitHH nvisitNA nunschvisitLL nunschvisitML nunschvisitM nunschvisitMH nunschvisitHH nunschvisitNA
##    <chr>       <chr>           <chr>          <dbl>    <dbl>   <dbl>    <dbl>    <dbl>    <dbl>         <dbl>         <dbl>        <dbl>         <dbl>         <dbl>         <dbl>
##  1 cnsn        00001           yes                0        0       0        1        0        0             0             0            0             1             0             0
##  2 cnsn        00003           yes                0        0       0        0        0        1             0             0            0             0             0             1
##  3 cnsn        00004           yes                0        0       1        0        0        0             0             0            0             1             0             0
##  4 cnsn        00006           yes                1        0       0        0        0        0             1             0            0             0             0             0
##  5 cnsn        00008           yes                0        1       0        0        0        0             1             0            0             0             0             0
##  6 cnsn        00009           yes                0        0       1        0        0        0             1             0            0             0             0             0
##  7 cnsn        00012           yes                0        0       0        1        0        0             0             1            0             0             0             0
##  8 cnsn        00013           yes                0        0       0        1        0        0             0             0            0             0             1             0
##  9 cnsn        00015           yes                0        1       0        0        0        0             0             0            1             0             0             0
## 10 cnsn        00016           yes                1        0       0        0        0        0             1             0            0             0             0             0
## # … with 4,045 more rows, and 846 more variables: nschedvisitLL <dbl>, nschedvisitML <dbl>, nschedvisitM <dbl>, nschedvisitMH <dbl>, nschedvisitHH <dbl>, nschedvisitNA <dbl>,
## #   ratiounschvisitLL <dbl>, ratiounschvisitML <dbl>, ratiounschvisitM <dbl>, ratiounschvisitMH <dbl>, ratiounschvisitHH <dbl>, ratiounschvisitNA <dbl>, ratiounschvisitrnkLL <dbl>,
## #   ratiounschvisitrnkML <dbl>, ratiounschvisitrnkM <dbl>, ratiounschvisitrnkMH <dbl>, ratiounschvisitrnkHH <dbl>, ratiounschvisitrnkNA <dbl>, naeLL <dbl>, naeML <dbl>, naeM <dbl>,
## #   naeMH <dbl>, naeHH <dbl>, naeNA <dbl>, nsaeLL <dbl>, nsaeML <dbl>, nsaeM <dbl>, nsaeMH <dbl>, nsaeHH <dbl>, nsaeNA <dbl>, aepervisitLL <dbl>, aepervisitML <dbl>,
## #   aepervisitM <dbl>, aepervisitMH <dbl>, aepervisitHH <dbl>, aepervisitNA <dbl>, saepervisitLL <dbl>, saepervisitML <dbl>, saepervisitM <dbl>, saepervisitMH <dbl>,
## #   saepervisitHH <dbl>, saepervisitNA <dbl>, aepervisitrnkLL <dbl>, aepervisitrnkML <dbl>, aepervisitrnkM <dbl>, aepervisitrnkMH <dbl>, aepervisitrnkHH <dbl>, aepervisitrnkNA <dbl>,
## #   saepervisitrnkLL <dbl>, saepervisitrnkML <dbl>, saepervisitrnkM <dbl>, saepervisitrnkMH <dbl>, saepervisitrnkHH <dbl>, saepervisitrnkNA <dbl>, medianaereportingdelayLL <dbl>, …
tibble(columns = colnames(df_mm_bin)) %>%
  DT::datatable()

Coefficients

Modelling coefficients have been preselected.

tar_read(df_form) %>%
  DT::datatable()

Cross Validation Indeces

Indeces of modelling matrix that defines time series cross validation strategy.

tar_read(df_cv)
## # A tibble: 45 × 4
##    year_start_act category_id index_past                                                                  index_next_year                                                              
##             <dbl> <chr>       <chr>                                                                       <chr>                                                                        
##  1           2011 cnsn        70,71,72,84,85,86,87,88,89,90,91,92,93,94,95,96,108,109,110,111,113,114,11… 155,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,187,188,189,…
##  2           2012 cnsn        70,71,72,84,85,86,87,88,89,90,91,92,93,94,95,96,108,109,110,111,113,114,11… 211,229,231,234,241,242,243,244,245,246,247,249,257,258,259,260,261,262,263,…
##  3           2013 cnsn        70,71,72,84,85,86,87,88,89,90,91,92,93,94,95,96,108,109,110,111,113,114,11… 2,4,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,…
##  4           2014 cnsn        2,4,70,71,72,84,85,86,87,88,89,90,91,92,93,94,95,96,108,109,110,111,113,11… 1,3,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31…
##  5           2015 cnsn        1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28… 30,34,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,6…
##  6           2016 cnsn        1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28… 98,99,100,101,102,103,106,141,142,143,144,145,146,161,162,163,164,165,166,16…
##  7           2017 cnsn        1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28… 228,238,239,240,248,251,252,253,254,255,256,611,633,650,651,652,653,654,655,…
##  8           2018 cnsn        1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28… 358,359,360,361,362,363,364,365,366,367,369,370,371,698,785,786,787,788,789,…
##  9           2019 cnsn        1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28… 368,805,808                                                                  
## 10           2015 dtin        5678,5679,5680,5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692… 5698,5714,5715,5724,5725,5726,5727,5728,5729,5730,5731,5732,5733,5734,5735,5…
## # … with 35 more rows

Lookup Tables

Features

All names of all features and their variations.

tar_read(df_feat_lookup) %>%
  DT::datatable()

Categories

All finding statements mapped to clinical impact factors.

tar_read(df_cat_lookup) %>%
  DT::datatable()